library(pacman)
p_load(data.table, edgeR, ggplot2, hrbrthemes, viridis, tidyverse, scales)
doc_theme <- theme_ipsum(
base_family = "Arial",
caption_margin = 12,
axis_title_size = 12,
axis_col = "black")
curated_names <- fread(
"../../curated_names.tsv")
aba_bed <- fread(
"../../CP046654.1.bed",
col.names = c(
"chromosome",
"left",
"right",
"locus_tag",
"gene_name",
"strand",
"coding",
"completeness"))
aba <- fread(
"../../all_counts_seal.tsv.gz",
header = FALSE,
col.names = c(
"spacer",
"count",
"condition"))
aba_key <- fread(
"../../aba_key.tsv")
aba_design <- fread(
"../../ABA1_experimental_design.tsv",
na.strings = c("NA"))
aba_genome <- aba_bed[
aba_key[, .(spacer, type, locus_tag, y_pred, target, offset)],
on = .(locus_tag)]
# define the experimental design space to only take into consideration "tubes"
aba_design <- aba_design[experiment == "tube"]
publication_design <- copy(aba_design)
# publication_design[, timing := gsub("T", "t", timing)]
publication_design[, rep := paste0("(", rep, ")")]
# keep only the counts that are in the experimental design space
aba <- aba[condition %in% aba_design$condition]
# convert single column into a table
aba_grid <-
data.table::dcast(
aba,
spacer ~ factor(condition, levels = unique(condition)),
value.var = "count",
fill = 0)
aba_grid_matrix <-
data.matrix(aba_grid[, -c("spacer")])
row.names(aba_grid_matrix) <- aba_grid$spacer
d <- dist(t(cpm(aba_grid_matrix)), method = "canberra")
d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim
d_scale <- data.table(d_fit$points, keep.rownames = "condition")
d_scale <- publication_design[d_scale, on = .(condition)]
d_scale[, Timing := timing]
d_scale[, Chemical := paste(drug, dose, "ug/mL")]
setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))
this_title <- "MDS by Sample"
plot_object <- d_scale %>%
arrange(dose) %>%
mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
ggplot(
aes(
x = `Dimension 1`,
y = `Dimension 2`,
fill = Chemical,
shape = Timing)) +
geom_point(size = 5) +
doc_theme +
# ggtitle(this_title) +
scale_fill_manual(
values = alpha(
c(
"grey", # no drug
"#a6cee3", # Imipenem 0.06
"#1f78b4", # Imipenem 0.09
"#b2df8a", # Meropenem 0.11
"#33a02c", # Meropenem 0.17
"#e31a1c", # Rifampicin 0.34
"#ff7f00" # Colistin 0.44
),
0.50)) +
scale_shape_manual(
name = "Timing",
values = c(21, 22, 23)) +
guides(
fill = guide_legend(override.aes = list(shape = 21, stroke = 0), order = 1))
print(plot_object)

ggsave("MDS.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
samples_T1 <-
dcast(melt(aba_grid, id.vars = "spacer")[variable %in% aba_design[timing == "T1", condition]], spacer ~ variable, value.var = "value")
samples_T1_matrix <- data.matrix(samples_T1[, -1])
rownames(samples_T1_matrix) <- samples_T1$spacer
d <- dist(t(cpm(samples_T1_matrix)), method = "canberra")
d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim
d_scale <- data.table(d_fit$points, keep.rownames = "condition")
d_scale <- publication_design[d_scale, on = .(condition)]
d_scale[, Timing := timing]
d_scale[, Chemical := paste(drug, dose, "ug/mL")]
setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))
this_title <- "MDS by Sample at t1"
plot_object <- d_scale %>%
arrange(dose) %>%
mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
ggplot(
aes(
x = `Dimension 1`,
y = `Dimension 2`,
fill = Chemical)) +
geom_point(size = 5, shape = 22) +
doc_theme +
ggtitle(this_title) +
scale_fill_manual(
values = alpha(
c(
"grey",
"#a6cee3",
"#1f78b4",
"#b2df8a",
"#33a02c",
"#e31a1c",
"#ff7f00"),
0.35)) +
guides(
fill = guide_legend(
override.aes = list(shape = 22),
order = 1))
print(plot_object)

ggsave("MDS_T1.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
samples_T1 <-
dcast(melt(aba_grid, id.vars = "spacer")[variable %in% aba_design[timing == "T2", condition]], spacer ~ variable, value.var = "value")
samples_T1_matrix <- data.matrix(samples_T1[, -1])
rownames(samples_T1_matrix) <- samples_T1$spacer
d <- dist(t(cpm(samples_T1_matrix)), method = "canberra")
d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim
d_scale <- data.table(d_fit$points, keep.rownames = "condition")
d_scale <- publication_design[d_scale, on = .(condition)]
d_scale[, Timing := timing]
d_scale[, Chemical := paste(drug, dose, "ug/mL")]
setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))
this_title <- "MDS by Sample at t2"
plot_object <- d_scale %>%
arrange(dose) %>%
mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
ggplot(
aes(
x = `Dimension 1`,
y = `Dimension 2`,
fill = Chemical)) +
geom_point(size = 5, shape = 23) +
doc_theme +
ggtitle(this_title) +
scale_fill_manual(
values = alpha(
c(
"grey",
"#a6cee3",
"#1f78b4",
"#b2df8a",
"#33a02c",
"#e31a1c",
"#ff7f00"),
0.35)) +
guides(
fill = guide_legend(
override.aes = list(shape = 23),
order = 1))
print(plot_object)

ggsave("MDS_T2.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
aba.cpm <-
aba_grid_matrix %>%
cpm %>%
as_tibble(rownames = "spacer") %>%
pivot_longer(!spacer, names_to = "condition", values_to = "CPM") %>%
full_join(aba_design) %>%
mutate(rep = paste("Replicate", rep)) %>%
group_by(verbose, timing) %>%
nest %>%
mutate(
data = map(
data, ~.x %>% pivot_wider(id_cols = spacer, names_from = rep, values_from = CPM))) %>%
filter(timing != "T0") %>%
mutate(
correlation = paste0("(", round(
map_dbl(
data,
~cor(.$`Replicate 1`, .$`Replicate 2`)), 2), ")")) %>%
unite("Sample Name", c("verbose", "timing", "correlation"), sep = " ")
aba.cpm <- aba.cpm %>%
mutate(
plot = map2(
data,
`Sample Name`,
~ggplot(
.x,
aes(x = `Replicate 1`, y = `Replicate 2`)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red") +
doc_theme +
theme(legend.position = "bottom") +
ggtitle(paste("CPM:", .y)) +
scale_x_continuous(
trans = "log10",
limits = c(1, pmax(max(.$`Replicate 1`), max(.$`Replicate 2`)))) +
scale_y_continuous(
trans = "log10",
limits = c(1, pmax(max(.$`Replicate 1`), max(.$`Replicate 2`))))))
print(aba.cpm$plot)
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

##
## [[14]]

aba.cpm %>% select(-plot) %>%
filter(`Sample Name` %like% "No drug") %>%
unnest(cols = `data`) %>%
ggplot(
aes(x = `Replicate 1`, y = `Replicate 2`)) +
geom_point() +
facet_wrap(facets = "`Sample Name`") +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number_si()) +
scale_y_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number_si()) +
geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red")
## Warning: `label_number_si()` was deprecated in scales 1.2.0.
## Please use the `scale_cut` argument of `label_number()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

aba.cpm %>% select(-plot) %>%
filter(!`Sample Name` %like% "No drug") %>%
unnest(cols = `data`) %>%
ggplot(
aes(x = `Replicate 1`, y = `Replicate 2`)) +
geom_point() +
facet_wrap(facets = "`Sample Name`") +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number_si()) +
scale_y_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number_si()) +
geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red")

cpm.max <- aba.cpm %>%
unnest(data) %>%
select(`Replicate 1`, `Replicate 2`) %>%
max
aba.cpm %>%
unnest(data) %>%
inner_join(aba_key) %>%
arrange(desc(type)) %>%
mutate(Type = type) %>%
filter(`Sample Name` %like% "No drug") %>%
ggplot(aes(x = `Replicate 1`, y = `Replicate 2`)) +
geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "black") +
geom_point(aes(color = Type)) +
scale_colour_manual(
values =
c("control" = alpha("grey", 0.10),
"mismatch" = alpha("#1F78B4", 0.10),
"perfect" = alpha("#E31A1C", 0.5))) +
facet_wrap(facets = c("`Sample Name`")) +
theme(
legend.position = "bottom",
legend.key = element_rect(fill = "#ECECEC")) +
guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number(scale_cut = cut_short_scale()),
limits = c(0, cpm.max)) +
scale_y_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number(scale_cut = cut_short_scale()),
limits = c(0, cpm.max)) +
facet_wrap(facets = c("`Sample Name`")) +
doc_theme
## Joining, by = "spacer"

aba.cpm %>%
unnest(data) %>%
inner_join(aba_key) %>%
arrange(desc(type)) %>%
mutate(Type = type) %>%
filter(!`Sample Name` %like% "No drug") %>%
ggplot(aes(x = `Replicate 1`, y = `Replicate 2`)) +
geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "black") +
geom_point(aes(color = Type)) +
scale_colour_manual(
values =
c("control" = alpha("grey", 0.15),
"mismatch" = alpha("#1F78B4", 0.15),
"perfect" = alpha("#E31A1C", 0.5))) +
facet_wrap(facets = c("`Sample Name`")) +
theme(
legend.position = "bottom",
legend.key = element_rect(fill = "#ECECEC")) +
guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number(scale_cut = cut_short_scale()),
limits = c(0, cpm.max)) +
scale_y_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^(1:6)),
labels = label_number(scale_cut = cut_short_scale()),
limits = c(0, cpm.max)) +
facet_wrap(facets = c("`Sample Name`")) +
doc_theme
## Joining, by = "spacer"
